
# load required libraries
library(NMOF)
library(readxl)
library(sm)
library(mgcv)
library(olsrr)

# set working directory
setwd("X:\\Access\\Gas\\DRP Methods")

# source drp functions
source("code/drp_functions.R")

# set input values
targetTenor<-10

# load and shape example data
yieldData<-read_excel("data\\GGT DRP annual update - ERA Revised Bond Yield Approach (29 June 2018)_TOY.xlsx",sheet="Gaussian Kernel",skip=1)

yieldData<-yieldData[-nrow(yieldData),c(2:3,7)]

names(yieldData)<-c("tenor","yield","faceValue")

yieldData$tenor<-as.numeric(yieldData$tenor)

# solve kernel, NS and NSS models of data
costDebt<-estimateCostOfDebt()

costDebtKernel<-sm::sm.regression(yieldData$tenor,yieldData$yield ,h=1.5,
                                    weights=yieldData$faceValue,display="none",
                                    eval.points=yieldData$tenor)$estimate

# plot these solutions
plot(yieldData$tenor,yieldData$yield,
     main="Bond Yield Estimators",
     xlab="Tenor (years)",ylab="Yield (AUD annualised %)",
     col="orange")
lines(yieldData$tenor,NMOF::NS(costDebt$NSparameters,yieldData$tenor),col="black")
lines(yieldData$tenor,NMOF::NSS(costDebt$NSSparameters,yieldData$tenor),col="red")
lines(yieldData$tenor,costDebtKernel,col="purple")
abline(v=10,lty=2,col="blue")
legend(20,y=4.45,legend=c("Gaussian kernel","Nelson-Siegel","Svensson","Target tenor"),
       lwd=c(1,1,1,1),lty=c(1,1,1,2),col=c("purple","black","red","blue"))

lines(yieldData$tenor,NS(c(0,2.437,14.22,1/0.054686),yieldData$tenor),col="green")
((yieldData$yield - NS(c(0,2.437,14.22,0.055),yieldData$tenor))^2)


# define a 'true' model for purposes of simulating bond yields when
#  thickening and/or thinning the bond sample to understand the 
#  impact of data support on the accuracy of the cost of debt
#  estimate.
# Base this model on the NSS model above

# Two stages in simulation: 
# 1) first define the density of points observed
smModel<-sm::sm.density(yieldData$tenor,h=1.5,display="none",
                        eval.points=yieldData$tenor,positive=TRUE)
empiricalCum<-data.frame(density=c(0,smModel$estimate),tenor=c(0,smModel$eval.points))
empiricalCum$cumDensity<-cumsum(empiricalCum$density)/sum(empiricalCum$density)

# 2) then predict a value conditional on a random draw from the density defined in 1) above.
#  in the data
trueModel<-costDebt$NSSparameters
trueCostOfDebt<-NMOF::NSS(trueModel,targetTenor)
trueResiduals<- yieldData$yield - NMOF::NSS(trueModel,yieldData$tenor)

# in this instance the residuals are not time dependent
m1<-lm(trueResiduals~yieldData$tenor)
olsrr::ols_test_breusch_pagan(m1)
# reject heteroskedasticity

# calculate sd for random draws about the NSS model
ols_sd<-sd(trueResiduals)

# loop through different data sample sizes, with M solutions at each sample size
# we can also shrink the density smModel away from the tenor.
M<-100

dataSampleSize<-10:100
measureMat<-matrix(NA,nrow=length(dataSampleSize),ncol=M)
kernelAccuracy<-NSAccuracy<-NSSAccuracy<-measureMat

faffTime<-system.time(
for(i in 1:length(dataSampleSize)) {
  nSample<-dataSampleSize[i]
  for(j in 1:M) {
    drawUnif<-runif(nSample)
    drawFaceValue<-sample(yieldData$faceValue,nSample,replace=TRUE)
    drawDensity<-approx(x=empiricalCum$cumDensity,y=empiricalCum$tenor,xout=drawUnif)$y
    drawResid<-rnorm(nSample,0,ols_sd)
    drawYield<-NMOF::NSS(trueModel,drawDensity)+drawResid
    # simulations will produce on occasion some negative bond yields ...
    measureMat[i,j]<-measureSupport(inData=drawDensity)
    tempData<-data.frame(tenor=drawDensity,yield=drawYield,faceValue=drawFaceValue)
    tempSolution<-estimateCostOfDebt(inData=tempData)
    kernelAccuracy[i,j]<-tempSolution$solutions[1]
    NSAccuracy[i,j]<-tempSolution$solutions[2]
    NSSAccuracy[i,j]<-tempSolution$solutions[3]
  } 
  cat("this is iteration: ",i," of ",length(dataSampleSize),"\n")
}
)
save.image("RData/support_20181009.RData")

#i5-6300U @2.40Ghz  4GB RAM surface pro
# 6 hours to run

# aggregate results
allSims<-data.frame(sampleSize=dataSampleSize,
                    supportMeasure=as.numeric(measureMat),
                    kernelEstimate=as.numeric(kernelAccuracy),
               NSEstimate=as.numeric(NSAccuracy),
               NSSEstimate=as.numeric(NSSAccuracy))

allSims$bondYieldAll<-apply(allSims[,-(1:2)],1,mean)
allSims$estimateVar<-apply(allSims[,3:5],1,sd)

# distance from cost of debt
allSims[,-c(1:2,ncol(allSims))]<-abs(allSims[,-c(1:2,ncol(allSims))]-trueCostOfDebt)

simFrame<-data.frame(supportMeasure=
                       seq(min(allSims$supportMeasure),max(allSims$supportMeasure),
                           length=200))

# smooth the error vs support measure relationship for 
#  each estimation method
kernelGAM<-mgcv::gam(kernelEstimate~s(supportMeasure),data=allSims)
kernelPredict<-predict(kernelGAM, newdata=simFrame)

NSGAM<-mgcv::gam(NSEstimate~s(supportMeasure),data=allSims)
NSPredict<-predict(NSGAM, newdata=simFrame)

NSSGAM<-mgcv::gam(NSSEstimate~s(supportMeasure),data=allSims)
NSSPredict<-predict(NSSGAM, newdata=simFrame)

bondYieldGAM<-mgcv::gam(bondYieldAll~s(supportMeasure),data=allSims)
bondYieldPredict<-predict(bondYieldGAM, newdata=simFrame)

plot(simFrame$supportMeasure,kernelPredict,type="l",col="purple",
     xlab="Kernel support measure",
     ylab="Absolute difference from true cost of debt")
lines(simFrame$supportMeasure,NSPredict,col="black")
lines(simFrame$supportMeasure,NSSPredict,col="red")
lines(simFrame$supportMeasure,bondYieldPredict,col="blue",lwd=2)
legend(2,y=1,legend=c("Gaussian kernel","Nelson-Siegel","Svensson","Average"),
       lwd=c(1,1,1,2),lty=c(1,1,1,1),col=c("purple","black","red","blue"))

# equate support measure with sample size
sampleSizeGAM<-lm(supportMeasure~sampleSize,data=allSims)
sampleSizePredict<-predict(sampleSizeGAM, 
                       newdata=data.frame(sampleSize=dataSampleSize))
sampleSize1<-dataSampleSize[which.min(abs(sampleSizePredict-1))]
sampleSize2<-dataSampleSize[which.min(abs(sampleSizePredict-2))]

plot(dataSampleSize,sampleSizePredict,type="l",col="purple",
     xlab="Number of bonds",
     ylab="Kernel support measure")
abline(h=1,lty=2)
abline(h=2,lty=2)
abline(v=sampleSize1,lty=2)
abline(v=sampleSize2,lty=2)

######
summary(allSims$estimateVar)
varGAM<-mgcv::gam(estimateVar~s(supportMeasure),data=allSims)
varPredict<-predict(varGAM, newdata=simFrame)

plot(simFrame$supportMeasure~varPredict*100,type="l",
     xlab="Kernel support measure",
     ylab="Standard deviation among estimates (basis points)")

allSims$greaterThan30<-ifelse(allSims$estimateVar>0.3,1,0)
binaryGAM<-mgcv::gam(allSims$greaterThan30~s(supportMeasure),family=binomial,data=allSims)
binaryPredict<-predict(binaryGAM,newdata=simFrame,type="response")

plot(simFrame$supportMeasure,binaryPredict,type="l",
     xlab="Kernel support measure",
     ylab="Prob(standard deviation > 30 basis points)")
abline(h=0.05,lty=2)
abline(h=0.025,lty=2)
abline(h=0.01,lty=2)